library(survey)
library(foreign)
library(ggplot2)
library(cowplot)
library(dplyr)

gss.df <- read.dta("~/Downloads/GSS7218_R2.DTA")

#### CLEANING ####

gss.df$con_mil <- as.numeric(gss.df$conarmy) ## dummy variable for great deal of confidence
gss.df$con_mil[gss.df$con_mil > 1] <- 0

## Gender
gss.df$male <- as.numeric(gss.df$sex) ## dummy variable for male
gss.df$male[gss.df$male == 2] <- 0

## Race
gss.df$white <- as.numeric(gss.df$race) ## RACE DUMMY: white 1/black 0/other NA
gss.df$white[gss.df$white == 2] <- 0 
gss.df$white[gss.df$white == 3] <- NA # other - NA

## Party ID
gss.df$dem <- as.numeric(gss.df$partyid) ## PID DUMMY: demorats 1/republicans 0/other NA
gss.df$dem[gss.df$dem >= 5 & gss.df$dem <= 7] <- 0
gss.df$dem[gss.df$dem == 2 | gss.df$dem == 3] <- 1
gss.df$dem[gss.df$dem == 4 | gss.df$dem == 8] <- NA

## Ideology
gss.df$ideo3 <- as.numeric(gss.df$polviews)
gss.df$ideo3[gss.df$ideo3 <= 3 ] <- -1 # liberal
gss.df$ideo3[gss.df$ideo3 == 4] <- 0 # moderate
gss.df$ideo3[gss.df$ideo3 >= 5] <- 1 # conservative

## Education
gss.df$education <- 0
gss.df$education[gss.df$educ < 12] <- 0 # less than high school
gss.df$education[gss.df$educ == 12] <- 1 # high school graduate
gss.df$education[gss.df$educ > 12 & gss.df$educ < 16] <- 2 # some college
gss.df$education[gss.df$educ == 16] <- 3 # college graduate
gss.df$education[gss.df$educ > 16] <- 4 # post-graduate

gss.df$educ_level <- NA
gss.df$educ_level[gss.df$education == 0 | gss.df$education == 1] <- 1
gss.df$educ_level[gss.df$education == 2 | gss.df$education == 3] <- 2
gss.df$educ_level[gss.df$education == 4] <- 3

## Income
gss.df$inc_lv <- NA
gss.df$inc_lv[gss.df$realinc < 25000] <- 1 # less than 25000
gss.df$inc_lv[gss.df$realinc > 24999 & gss.df$realinc < 50000] <- 2 # 25000-50000
gss.df$inc_lv[gss.df$realinc > 49999 & gss.df$realinc < 85000] <- 3 # 50000-85000
gss.df$inc_lv[gss.df$realinc > 84999 & gss.df$realinc < 150000] <- 4 # 85000-150000
gss.df$inc_lv[gss.df$realinc > 149999] <- 5 # more than 150000

## Generations
gss.df$gens <- NA
gss.df$gens[gss.df$cohort >= 1928 & gss.df$cohort <= 1946] <- 1 # Silent: 1928-1946
gss.df$gens[gss.df$cohort >= 1947 & gss.df$cohort <= 1964] <- 2 # Boomer: 1947-1964
gss.df$gens[gss.df$cohort >= 1965 & gss.df$cohort <= 1979] <- 3 # Gen X: 1965-1979
gss.df$gens[gss.df$cohort >= 1980 & gss.df$cohort <= 1996] <- 4 # Millenial: 1980-1996
gss.df$gens[gss.df$cohort >= 1997] <- 5 # Gen Z: 1997+

## Regions
gss.df$regs <- as.numeric(gss.df$region)
gss.df$regs[gss.df$regs <= 2] <- 1 # Northeast
gss.df$regs[gss.df$regs >= 3 & gss.df$regs <= 4] <- 2 # Midwest
gss.df$regs[gss.df$regs >= 5 & gss.df$regs <= 7] <- 3 # South
gss.df$regs[gss.df$regs >= 8] <- 4 # West

## Create weighted survey design object
gss_sd <-
  svydesign(
    id = ~ 1,
    weights = ~ wtssall,
    data = gss.df
  )

#### BY YEAR ####

milconf <- svyby(~ con_mil, ~ year, gss_sd, svymean, na.rm = TRUE)
milconf <- milconf[-c(1,12),]

year_plot_df <- data.frame(time = milconf$year,
                           conf = milconf$con_mil * 100)

year_plot <- ggplot(year_plot_df) +
  geom_rect(aes(xmin=1972, xmax=1974, ymin=-Inf, ymax=Inf), fill = "gray84") +
  geom_rect(aes(xmin=1990, xmax=1992, ymin=-Inf, ymax=Inf), fill = "gray84") +
  geom_rect(aes(xmin=2001, xmax=2018, ymin=-Inf, ymax=Inf), fill = "gray84") +
  geom_line(aes(x = time, y = conf)) + ylim(0,80) +
  xlab("Year") + ylab("'A Great Deal' of Confidence in Military (%)") + 
  theme_bw()
ggsave("gss_year.jpeg", plot = year_plot, dpi = 1000)

#### BY PARTY ####

milconf_pid <- svyby(~ con_mil, ~ year+dem, gss_sd, svymean, na.rm = TRUE)
milconf_pid <- milconf_pid[-c(1,12,33,44),]

pid_plot_df <- data.frame(time = milconf_pid$year[1:30],
                          conf_rep = milconf_pid$con_mil[1:30] * 100,
                          conf_dem = milconf_pid$con_mil[31:60] * 100)

pid_plot <- ggplot(pid_plot_df) +
  geom_rect(aes(xmin = 1970, xmax = 1974, ymin = -Inf, ymax = Inf), 
            fill = "gray95", alpha = 0.15) +
  geom_text(aes(x = 1972, label = "Nixon", y = 80), size = 2.6, vjust = -0.4) +
  geom_text(aes(x = 1975, label = "Ford", y = 80), size = 2.6, hjust = 0.2, vjust = -0.4) +
  geom_rect(aes(xmin = 1977, xmax = 1981, ymin = -Inf, ymax = Inf), 
            fill = "gray95", alpha = 0.15) +
  geom_text(aes(x = 1979, label = "Carter", y = 80), size = 2.6, vjust = -0.4) +
  geom_text(aes(x = 1985, label = "Reagan", y = 80), size = 2.6, vjust = -0.4) +
  geom_rect(aes(xmin = 1989, xmax = 1993, ymin = -Inf, ymax = Inf), 
            fill = "gray95", alpha = 0.15) +
  geom_text(aes(x = 1991, label = "Bush-41", y = 80), size = 2.6, vjust = -0.4) +
  geom_text(aes(x = 1995, label = "Clinton", y = 80), size = 2.6, hjust = -0.1, vjust = -0.4) +
  geom_rect(aes(xmin = 2001, xmax = 2009, ymin = -Inf, ymax = Inf), 
            fill = "gray95", alpha = 0.15) +
  geom_text(aes(x = 2004, label = "Bush-43", y = 80), size = 2.6, hjust = 0.2, vjust = -0.4) +
  geom_text(aes(x = 2013, label = "Obama", y = 80), size = 2.6, vjust = -0.4) +
  geom_rect(aes(xmin = 2017, xmax = 2020, ymin = -Inf, ymax = Inf), 
            fill = "gray95", alpha = 0.15) +
  geom_text(aes(x = 2017, label = "Trump", y = 80), size = 2.6, hjust = -0.1, vjust = -0.4)
pid_plot <- pid_plot +
  geom_ribbon(aes(x = time, ymin = conf_dem, ymax = conf_rep), fill = "gray60") +
  xlab("Year") + ylab("Military Confidence: Reps vs. Dems (%)") + 
  xlim(1970, 2020) + ylim(0, 80) + 
  geom_segment(x = 1978, xend = 1978, y = -5, yend = 80) +
    geom_text(aes(x = 1978, label = "Operation Eagle Claw", y = 0),
              size = 3, hjust = 0.08, vjust = -0.5, angle = 90) +
  geom_segment(x = 1983, xend = 1983, y = -5, yend = 80) +
    geom_text(aes(x = 1983, label = "Grenada", y = 0),
              size = 3, hjust = 0.2, vjust = -0.5, angle = 90) +
  geom_segment(x = 1989, xend = 1989, y = -5, yend = 80) +
    geom_text(aes(x = 1989, label = "Gulf War", y = 0),
              size = 3, hjust = 0.2, vjust = -0.5, angle = 90) +
  geom_segment(x = 1994, xend = 1994, y = -5, yend = 80) +
    geom_text(aes(x = 1994, label = "Bosnia", y = 0),
              size = 3, hjust = 0.2, vjust = -0.5, angle = 90) +
  geom_segment(x = 2000, xend = 2000, y = -5, yend = 80) + 
    geom_text(aes(x = 2000, label = "9/11, OEF", y = 0),
              size = 3, hjust = 0.15, vjust = -0.5, angle = 90) +
  geom_segment(x = 2002, xend = 2002, y = -5, yend = 80) +
    geom_text(aes(x = 2002, label = "OIF, Capture of Saddam Hussein", 
              y = 0), size = 3, hjust = 0.05, vjust = -0.5, angle = 90) +
  geom_segment(x = 2004, xend = 2004, y = -5, yend = 80) +
    geom_text(aes(x = 2004, label = "Samarra Golden Dome, Iraqi Civil War", y = 0),
              size = 3, hjust = 0.05, vjust = -0.5, angle = 90) +
  geom_segment(x = 2006, xend = 2006, y = -5, yend = 80) + 
    geom_text(aes(x = 2006, label = "Iraq Surge, Rumsfeld Removed", y = 0), 
              size = 3, hjust = 0.05, vjust = -0.5, angle = 90) +
  geom_segment(x = 2010, xend = 2010, y = -5, yend = 80) +
    geom_text(aes(x = 2010, label = "Osama bin Laden Killed", y = 0),
              size = 3, hjust = 0.07, vjust = -0.5, angle = 90) +
  geom_segment(x = 2012, xend = 2012, y = -5, yend = 80) +
    geom_text(aes(x = 2012, label = "Fall of Mosul, Benghazi, Crimea", y = 0),
              size = 3, hjust = 0.06, vjust = -0.5, angle = 90) +
  geom_segment(x = 2016, xend = 2016, y = -5, yend = 80) +
    geom_text(aes(x = 2016, label = "Liberation of Mosul and Raqqa", y = 0),
              size = 3, hjust = 0.05, vjust = -0.5, angle = 90) +
  theme_bw()
pid_plot
ggsave("gss_pid.jpeg", plot = pid_plot, dpi = 1000)

#### MEAN IDEOLOGY BY PARTY ####

ideo_dem <- svyby(~ ideo3, ~year+dem, gss_sd, svymean, na.rm = TRUE)
ideo_dem <- ideo_dem[-c(1,2,33,34),]

ideo_plot_df <- data.frame(time = ideo_dem$year[1:30],
                           ideo_rep = ideo_dem$ideo3[1:30],
                           ideo_dem = ideo_dem$ideo3[31:60])

pids <- c("Democrats"="solid", "Republicans"="longdash")
ideo_plot <- ggplot(ideo_plot_df) +
  geom_line(aes(x = time, y = ideo_rep, linetype = "Republicans")) +
  geom_line(aes(x = time, y = ideo_dem, linetype = "Democrats")) +
  labs(x = "Year",
       y = "Ideology (Liberal - Conservative)",
       linetype = "Party ID") +
  scale_linetype_manual(values = pids) +
  xlim(1970,2020) + theme_bw()
ggsave("gss_ideo.jpeg", plot = ideo_plot, dpi = 1000)

#### BY RACE ####

milconf_race <- svyby(~ con_mil, ~ year+white, gss_sd, svymean, na.rm = TRUE)
milconf_race <- milconf_race[-c(1,12,33,44),]

race_plot_df <- data.frame(time = milconf_race$year[1:30],
                           conf_black = milconf_race$con_mil[1:30] * 100,
                           conf_white = milconf_race$con_mil[31:60] * 100)

races <- c("Black"="longdash", "White"="solid")
race_plot <- ggplot(race_plot_df) +
  geom_line(aes(x = time, y = conf_black, linetype = "Black")) +
  geom_line(aes(x = time, y = conf_white, linetype = "White")) +
  labs(x = "Year",
       y = "Military Confidence (%) by Race",
       linetype = "Race") +
  scale_linetype_manual(values = races) +
  xlim(1970, 2020) + ylim(0, 80) + theme_bw()
ggsave("gss_race.jpeg", plot = race_plot, dpi = 1000)

#### BY SEX ####

milconf_sex <- svyby(~ con_mil, ~ year+male, gss_sd, svymean, na.rm = TRUE)
milconf_sex <- milconf_sex[-c(1,12,33,44),]

sex_plot_df <- data.frame(time = milconf_sex$year[1:30],
                          conf_female = milconf_sex$con_mil[1:30] * 100,
                          conf_male = milconf_sex$con_mil[31:60] * 100)

sexes <- c("Male"="longdash", "Female"="solid")
sex_plot <- ggplot(sex_plot_df) +
  geom_line(aes(x = time, y = conf_female, linetype = "Female")) +
  geom_line(aes(x = time, y = conf_male, linetype = "Male")) +
  labs(x = "Year",
       y = "Military Confidence (%) by Sex",
       linetype = "Sex") +
  scale_linetype_manual(values = sexes) +
  xlim(1970, 2020) + ylim(0, 80) + theme_bw()
ggsave("gss_sex.jpeg", plot = sex_plot, dpi = 1000)

#### BY EDUCATION ####

milconf_educ <- svyby(~ con_mil, ~ year+educ_level, gss_sd, svymean, na.rm = TRUE)
milconf_educ <- milconf_educ[-c(1,12,33,44,65,76),]

educ_plot_df <- data.frame(time = milconf_educ$year[1:30],
                           conf_hs = milconf_educ$con_mil[1:30] * 100,
                           conf_cl = milconf_educ$con_mil[31:60] * 100,
                           conf_gd = milconf_educ$con_mil[61:90] * 100)

edlev <- c("High School or Less"="longdash", "Graduate or Higher"="solid", "College"="dotdash")
educ_plot <- ggplot(educ_plot_df) +
  geom_line(aes(x = time, y = conf_hs, linetype = "High School or Less")) +
  geom_line(aes(x = time, y = conf_gd, linetype = "Graduate or Higher")) +
  geom_line(aes(x = time, y = conf_cl, linetype = "College")) +
  xlim(1970,2020) + ylim(0,80) + 
  labs(x = "Year",
       y = "Military Confidence (%) by Education Level",
       linetype = "Education Level") +
  scale_linetype_manual(values = edlev) + theme_bw()
ggsave("gss_educ.jpeg", plot = educ_plot, dpi = 1000)

#### BY PARTY AND EDUCATION ####

milconf_pided <- svyby(~ con_mil, ~year+dem+educ_level, gss_sd, svymean, na.rm = TRUE)
milconf_pided <- milconf_pided[-c(1,12,33,44,65,76,97,108,129,140,161,172),]

pided_plot_dfd <- data.frame(time = milconf_pided$year[1:30],
                             conf_hs = milconf_pided$con_mil[31:60] * 100,
                             conf_gd = milconf_pided$con_mil[151:180] * 100)
pided_plot_dfr <- data.frame(time = milconf_pided$year[1:30],
                             conf_hs = milconf_pided$con_mil[1:30] * 100,
                             conf_gd = milconf_pided$con_mil[121:150] * 100)

pided_plot_d <- ggplot(pided_plot_dfd) +
  geom_ribbon(aes(x = time, ymin = conf_gd, ymax = conf_hs), fill = "gray60") +
  xlab("Year") + ylab("Military Confidence by Education Level (%)") + 
  xlim(1970, 2020) + ylim(0, 80) + ggtitle("Democrats") + theme_bw()
pided_plot_r <- ggplot(pided_plot_dfr) +
  geom_ribbon(aes(x = time, ymin = conf_gd, ymax = conf_hs), fill = "gray60") +
  xlab("Year") + ylab("") + 
  xlim(1970, 2020) + ylim(0, 80) + ggtitle("Republicans") + theme_bw()
pided_plot <- plot_grid(pided_plot_d, pided_plot_r)
ggsave("gss_pided.jpeg", plot = pided_plot, dpi = 1000)

#### BY GENERATION ####

milconf_gen <- svyby(~ con_mil, ~year+gens, gss_sd, svymean, na.rm = TRUE)

gen_plot_df <- data.frame(time = milconf_gen$year[21:32],
                          conf_si = milconf_gen$con_mil[21:32] * 100,
                          conf_bo = milconf_gen$con_mil[53:64] * 100,
                          conf_gx = milconf_gen$con_mil[76:87] * 100,
                          conf_mi = c(NA, milconf_gen$con_mil[88:98] * 100))

gens <- c("Silent Generation"="dotted", "Baby Boomers"="dotdash", "Generation X"="longdash",
          "Millennials"="solid")
gen_plot <- ggplot(gen_plot_df) +
  geom_line(aes(x = time, y = conf_si, linetype = "Silent Generation")) +
  geom_line(aes(x = time, y = conf_bo, linetype = "Baby Boomers")) +
  geom_line(aes(x = time, y = conf_gx, linetype = "Generation X")) +
  geom_line(aes(x = time, y = conf_mi, linetype = "Millennials")) +
  xlim(1995,2020) + ylim(0,80) +
  labs(x = "Year",
       y = "Military Confidence (%) by Generation",
       linetype = "Generations") +
  scale_linetype_manual(values = gens) +
  theme_bw()
ggsave("gss_gens.jpeg", plot = gen_plot, dpi = 1000)

#### BY REGION ####

milconf_reg <- svyby(~ con_mil, ~year+regs, gss_sd, svymean, na.rm = TRUE)
milconf_reg <- milconf_reg[-c(1,12,33,44,65,76,97,108),]

reg_plot_df <- data.frame(time = milconf_reg$year[1:30],
                          conf_ne = milconf_reg$con_mil[1:30] * 100,
                          conf_mw = milconf_reg$con_mil[31:60] * 100,
                          conf_so = milconf_reg$con_mil[61:90] * 100,
                          conf_we = milconf_reg$con_mil[91:120] * 100)

regs <- c("Northeast"="solid", "Midwest"="dotdash", "South"="longdash", "West"="dotted")
reg_plot <- ggplot(reg_plot_df) +
  geom_line(aes(x = time, y = conf_ne, linetype = "Northeast")) +
  geom_line(aes(x = time, y = conf_mw, linetype = "Midwest")) +
  geom_line(aes(x = time, y = conf_so, linetype = "South")) +
  geom_line(aes(x = time, y = conf_we, linetype = "West")) +
  xlim(1970,2020) + ylim(0,80) +
  labs(x = "Year",
       y = "Military Confidence (%) by Region",
       linetype = "Regions") +
  scale_linetype_manual(values = regs) + theme_bw()
ggsave("gss_regs.jpeg", plot = reg_plot, dpi = 1000)

#### BY INCOME ####

milconf_inc <- svyby(~ con_mil, ~year+inc_lv, gss_sd, svymean, na.rm = TRUE)
milconf_inc <- milconf_inc[-c(1,12,33,44,65,92,101),]

inc_plot_df <- data.frame(time = milconf_inc$year[1:30],
                          conf_1 = milconf_inc$con_mil[1:30] * 100,
                          conf_2 = milconf_inc$con_mil[31:60] * 100,
                          conf_3 = c(milconf_inc$con_mil[61:68] * 100, NA, NA, 
                                     milconf_inc$con_mil[69:72] * 100, NA,
                                     milconf_inc$con_mil[73:75] * 100, NA,
                                     milconf_inc$con_mil[76:86] * 100),
                          conf_4 = c(milconf_inc$con_mil[87:89] * 100, NA, NA,
                                     milconf_inc$con_mil[90:110] * 100, NA,
                                     milconf_inc$con_mil[111:113] * 100))
inc_plot_df <- inc_plot_df %>%
  dplyr::select(time, conf_1, conf_2, conf_3, conf_4) %>%
  na.omit()

incs <- c("Less than 25,000"="dotdash", "25,000-50,000"="longdash", "50,000-85,000"="dotted",
          "More than 85,000"="solid")
inc_plot <- ggplot(inc_plot_df) +
  geom_line(aes(x = time, y = conf_1, linetype = "Less than 25,000")) +
  geom_line(aes(x = time, y = conf_2, linetype = "25,000-50,000")) +
  geom_line(aes(x = time, y = conf_3, linetype = "50,000-85,000")) +
  geom_line(aes(x = time, y = conf_4, linetype = "More than 85,000")) +
  xlim(1970,2020) + ylim(0,80) +
  labs(x = "Year",
       y = "Military Confidence (%) by Income Level",
       linetype = "Income Level") +
  scale_linetype_manual(values = incs) + theme_bw()
ggsave("gss_inc.jpeg", plot = inc_plot, dpi = 1000)
